ohibc logo
OHI British Columbia | OHI Science | Citation policy

knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'Figs/',
                      echo = TRUE, message = FALSE, warning = FALSE)

library(raster)
library(data.table)


source('~/github/src/R/common.R')  ###
  ### includes library(tidyverse); library(stringr); dir_M points to ohi directory

dir_git <- '~/github/spp_health_dists'

dir_am_data <- file.path(dir_M, 'git-annex/globalprep/_raw_data/aquamaps/d2017')

### goal specific folders and info
dir_setup   <- file.path(dir_git, 'data_setup')
dir_o_anx <- file.path(dir_O, 'git-annex/spp_health_dists')
# ### provenance tracking
library(provRmd); prov_setup()

### support scripts
source('~/github/src/R/rast_tools.R')
  ### raster plotting and analyzing scripts

1 Summary

Create a map of the distribution of current species health - all species in AquaMaps assessed by IUCN (using AquaMaps version of IUCN ID numbers)

2 Data Sources

3 Methods

3.1 Spatial distribution of current extinction risk

3.1.1 Aggregate mean risk by cell

Here we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or cat_score.

cell_risk_file <- file.path(dir_git, 'data', 'current_risk_by_loiczid.csv')

if(!file.exists(cell_risk_file)) {

  iucn_to_am_lookup <- fread(file.path(dir_setup, 'int', 'am_iucn_crosslisted_spp.csv')) %>%
    select(am_sid = speciesid, iucn_sid = iucn_id) %>%
    distinct()
  
  spp_current_risk <- fread(file.path(dir_git, 'data', 'iucn_spp_cat_current.csv')) %>%
    select(iucn_sid, cat, cat_score) %>%
    left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
    as.data.table()

  spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'))
  
  git_prov(file.path(dir_setup, 'int', 'am_iucn_crosslisted_spp.csv'), filetype = 'input')
  git_prov(file.path(dir_git, 'data', 'iucn_spp_cat_current.csv'), filetype = 'input')
  git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')

  ### using data.table join:
  spp_cells_risk <- spp_current_risk[spp_cells, on = 'am_sid']

  spp_cells_risk_sum <- spp_cells_risk %>%
    filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
    filter(prob >= 0.60) %>%
    group_by(loiczid) %>%
    summarize(n_spp = n(), mean_risk = mean(cat_score))
  
  write_csv(spp_cells_risk_sum, cell_risk_file)
  
} else {
  
  git_prov(file.path(dir_setup, 'int', 'am_iucn_crosslisted_spp.csv'), filetype = 'input')
  git_prov(file.path(dir_git, 'data', 'iucn_spp_cat_current.csv'), filetype = 'input')
  git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')
  git_prov(cell_risk_file, filetype = 'output')
  
}

3.1.2 Load raster and substitute mean risks

loiczid_rast <- raster(file.path(dir_git, 'data/loiczid_raster.tif'))

### gotta force the mean_risk column to be double; there are lots of zero
### values so will default to thinking that column is integer.
risk_by_cell <- read_csv(file.path(dir_git, 'data', 'current_risk_by_loiczid.csv'),
                         col_types = 'iid')

risk_rast <- subs(loiczid_rast, risk_by_cell, by = 'loiczid', which = 'mean_risk')
nspp_rast <- subs(loiczid_rast, risk_by_cell, by = 'loiczid', which = 'n_spp')

plot(risk_rast)

plot(nspp_rast)

risk_clipped <- risk_rast
values(risk_clipped)[values(nspp_rast) < 5] <- NA

plot(risk_clipped)

3.2 Spatial distribution of trend in extinction risk

3.2.1 Aggregate mean trend by cell

Again we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or trend_score.

cell_trend_file <- file.path(dir_git, 'data', 'spp_trend_by_loiczid.csv')

if(!file.exists(cell_trend_file)) {

  iucn_to_am_lookup <- fread(file.path(dir_setup, 'int', 'am_iucn_crosslisted_spp.csv')) %>%
    select(am_sid = speciesid, iucn_sid = iucn_id) %>%
    distinct()
  
  spp_trend <- fread(file.path(dir_git, 'data', 'trend_by_spp.csv')) %>%
    left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
    as.data.table()

  spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'))
  
  git_prov(file.path(dir_setup, 'int', 'am_iucn_crosslisted_spp.csv'), filetype = 'input')
  git_prov(file.path(dir_git, 'data', 'trend_by_spp.csv'), filetype = 'input')
  git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')

  ### using data.table join:
  spp_cells_trend <- spp_trend[spp_cells, on = 'am_sid']

  spp_cells_trend_sum <- spp_cells_trend %>%
    filter(!is.na(trend_score) & !is.na(iucn_sid)) %>%
    filter(prob >= 0.60) %>%
    group_by(loiczid) %>%
    summarize(n_spp = n(), mean_trend = mean(trend_score))
  
  write_csv(spp_cells_trend_sum, cell_trend_file)
  
} else {
  
  git_prov(file.path(dir_setup, 'int', 'am_iucn_crosslisted_spp.csv'), filetype = 'input')
  git_prov(file.path(dir_git, 'data', 'trend_by_spp.csv'), filetype = 'input')
  git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')
  git_prov(cell_trend_file, filetype = 'output')
  
}

3.3 Spatial distribution of time-series extinction risk

3.3.1 Aggregate mean risk by cell for selected years

Here we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or cat_score.

Using the time-series category scores, cast forward and back to fill out all year time series; then select to specific years and save.

iucn_to_am_lookup <- fread(file.path(dir_setup, 'int', 'am_iucn_crosslisted_spp.csv')) %>%
  select(am_sid = speciesid, iucn_sid = iucn_id) %>%
  distinct()

spp_ts_risk <- fread(file.path(dir_git, 'data', 'iucn_spp_cat_timeseries.csv')) %>%
  select(iucn_sid, year, cat_ts_score) %>%
  group_by(iucn_sid) %>%
  mutate(n_assess = n()) %>%
  complete(year = 1965:2017) %>%
  fill(cat_ts_score, .direction = 'down') %>%
  ungroup() %>%
  filter(!is.na(iucn_sid) & !is.na(cat_ts_score)) %>%
  left_join(iucn_to_am_lookup, by = 'iucn_sid') %>% 
  as.data.table()

git_prov(file.path(dir_setup, 'int', 'am_iucn_crosslisted_spp.csv'), filetype = 'input')
git_prov(file.path(dir_git, 'data', 'iucn_spp_cat_timeseries.csv'), filetype = 'input')

spp_assessed <- spp_ts_risk %>%
  group_by(year) %>%
  mutate(n_spp = n()) %>%
  ungroup()
ggplot(spp_assessed, aes(x = year, y = n_spp)) +
  ggtheme_plot() +
  geom_line() +
  labs(title = 'Number of assessed species over time',
       x = 'year', y = 'total assessed species')

spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv')) %>%
  filter(prob >= .60)
## 
Read 0.0% of 15023644 rows
Read 13.5% of 15023644 rows
Read 26.0% of 15023644 rows
Read 39.7% of 15023644 rows
Read 53.7% of 15023644 rows
Read 67.6% of 15023644 rows
Read 81.6% of 15023644 rows
Read 93.9% of 15023644 rows
Read 15023644 rows and 3 (of 3) columns from 0.312 GB file in 00:00:12
git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')

### Load raster and substitute mean risks

loiczid_rast <- raster(file.path(dir_git, 'data/loiczid_raster.tif'))

for(yr in c(1990, 1995, 2000, 2005, 2010:2017)) { ### yr <- 1990
    
  risk_df <- spp_ts_risk %>%
    filter(year == yr) %>%
    left_join(spp_cells, by = 'am_sid') %>%
    group_by(loiczid) %>%
    summarize(mean_risk = mean(cat_ts_score),
              n_spp = n())
  
  risk_rast <- subs(loiczid_rast, risk_df, by = 'loiczid', which = 'mean_risk')
  nspp_rast <- subs(loiczid_rast, risk_df, by = 'loiczid', which = 'n_spp')
  
  plot(risk_rast, main = paste0('Mean risk, ', yr))
  plot(nspp_rast, main = paste0('# assessed spp, ', yr))
  
  risk_clipped <- risk_rast
  values(risk_clipped)[values(nspp_rast) < 5] <- NA
  
  plot(risk_clipped, main = paste0('Mean risk, n_spp > 5, ', yr))
}


prov_wrapup(commit_outputs = FALSE)

4 Provenance

  • Run ID: 1 (f86f769); run tag: “standard run”
  • Run elapsed time: 190.195 seconds; run memory usage: 29096.1 MB
  • System info:
    • System: Linux, Release: 4.4.0-57-generic. Machine: x86_64. User: ohara.
    • R version: R version 3.4.3 (2017-11-30), Platform: x86_64-pc-linux-gnu, Running under: Ubuntu 14.04.5 LTS.
    • Attached base packages: stats, graphics, grDevices, utils, datasets, methods, base
    • Other attached packages: bindrcpp_0.2, provRmd_0.1.1, stringr_1.2.0, RColorBrewer_1.1-2, dplyr_0.7.2, purrr_0.2.3, readr_1.1.0, tidyr_0.7.0, tibble_1.3.4, ggplot2_2.2.1.9000, tidyverse_1.1.1, data.table_1.10.4-2, raster_2.5-8, sp_1.2-5
%3 9 rast_tools.R 10 am_iucn_crosslisted_spp.csv 2 4_current_health_map.Rmd#create_df_of_risk_by_cell 10->2 used 3 4_current_health_map.Rmd#create_df_of_trend_by_cell 10->3 used 4 4_current_health_map.Rmd#flesh_out_time_series 10->4 used 11 iucn_spp_cat_current.csv 11->2 used 12 am_spp_cells.csv 12->2 used 12->3 used 12->4 used 13 current_risk_by_loiczid.csv 5 4_current_health_map.Rmd#mean_risk_raster 13->5 used 14 loiczid_raster.tif 14->5 used 6 4_current_health_map.Rmd#mean_trend_raster 14->6 used 14->4 used 15 trend_by_spp.csv 15->3 used 16 spp_trend_by_loiczid.csv 16->6 used 17 iucn_spp_cat_timeseries.csv 17->4 used 1 4_current_health_map.Rmd 8 4_current_health_map.Rmd#setup 1->8 wasExecutedBy 1->2 wasExecutedBy 1->5 wasExecutedBy 1->3 wasExecutedBy 1->6 wasExecutedBy 1->4 wasExecutedBy 7 4_current_health_map.Rmd#prov_footer 1->7 wasExecutedBy 8->9 wasExecutedBy 2->13 wasGeneratedBy 3->16 wasGeneratedBy